Choropleth maps display data on a map by shading regions with different colors.
In this lecture, we focus on packages ggmap, ggplot2, and tmap to create static maps.
|
Centers for Disease Control and Prevention provide a weekly influenza surveillance report using a choropleth map.
ggmapWe need an API key to access Google Maps for these exercises.
Note that you need to create a billing account before you use your API key in R.
For more info, see Get an API Key and Signature and Google Maps Platform.
register_google() function.library(ggmap)
# Insert your API key to get your google map for this session
register_google(key = "")
# Insert your API key to get your google map permanently
register_google(key = "[your key]", write = TRUE)
register_google()
google_key()
Use package ggmap.
Three functions:
qmap() plots a quick map
get_map() gets map data for a location.
ggmap() plots stored map data
register_google()
# Use qmap (quick map) to draw a city map
qmap("Washington DC", zoom=10)
# Try different values for zooming
qmap("Washington DC", zoom=15)
qmap("Washington DC", zoom=18)
qmap("Washington DC", zoom=7)
# Get the same map with get_map
dc_map <- get_map("Washington DC", zoom=10)
# And plot it
ggmap(dc_map) + labs(x="Longitude", y="Latitude")
Pinpoint specific locations on the map.
Geocoding converts a character string to a longitude and latitude location.
geocode() takes a location name as input and returns the latitude and longitude.
# Geocode locations of interest
dc <- geocode("Washington DC")
dc # the longitude and latitude values
## # A tibble: 1 × 2
## lon lat
## <dbl> <dbl>
## 1 -77.0 38.9
wh <- geocode("White House")
wh
## # A tibble: 1 × 2
## lon lat
## <dbl> <dbl>
## 1 -77.0 38.9
uva <- geocode("University of Virginia")
uva
## # A tibble: 1 × 2
## lon lat
## <dbl> <dbl>
## 1 -78.5 38.0
# Map a point
uva_map <- ggmap(get_map(uva))
uva_map
Terrain Map is the default map, and includes major place names and roads, but it also includes geographic terrian features.
Use ?get_map to see available types.
maptype = c("terrain", "terrain-background", "satellite", "roadmap", "hybrid",
"toner", "watercolor", "terrain-labels", "terrain-lines", "toner-2010", "toner-2011",
"toner-background", "toner-hybrid", "toner-labels", "toner-lines", "toner-lite")
Different types of maps are useful in different situations. For example,
# Try different map types
ggmap(get_map(uva, maptype = "terrain", zoom=13))
ggmap(get_map(uva, maptype = "terrain-lines", zoom=13))
ggmap(get_map(uva, maptype = "satellite", zoom=13))
ggmap(get_map(uva, maptype = "roadmap", zoom=13))
ggmap(get_map(uva, maptype = "hybrid", zoom=13))
ggmap(get_map(uva, maptype = "toner-lite", zoom=13))
# Geocode locations of interest
eus <- geocode("West Virgnia")
dc <- geocode("Washington DC")
# Create a map of the Eastern US
ggmap(get_map(eus))
eus_map <- ggmap(get_map(eus, zoom=5))
eus_map
# Add a location on the map
eus_map + geom_point(data=dc, mapping=aes(x=lon, y=lat), color="red")
# Add multiple datapoints
loc_names <- c("White House", "University of Virginia", "Statue of Liberty", "Boston University")
loc_values <- geocode(loc_names)
loc_dat <- data.frame(name=loc_names, lat=loc_values$lat, lon=loc_values$lon)
# Add multiple locations on the map
eus_map + geom_point(data=loc_dat, mapping=aes(x=lon, y=lat), color="red")
# Add their names as text and adjust the label position
eus_map + geom_point(data=loc_dat, mapping=aes(x=lon, y=lat), color="red") +
geom_text(data=loc_dat, mapping=aes(x=lon, y=lat, label=name), nudge_y=-0.5, nudge_x=1, size=2, color="red")
# Try a different map type.
ggmap(get_map(eus, zoom=5, maptype = "terrain-background")) +
geom_point(data=loc_dat, mapping=aes(x=lon, y=lat), color="red") +
geom_text(data=loc_dat, mapping=aes(x=lon, y=lat, label=name), nudge_y=-0.5, nudge_x=1, size=2, color="red")
ggmap(get_map(eus, zoom=5, maptype = "toner-background")) +
geom_point(data=loc_dat, mapping=aes(x=lon, y=lat), color="red") +
geom_text(data=loc_dat, mapping=aes(x=lon, y=lat, label=name), nudge_y=-0.5, nudge_x=1, size=2, color="red")
ggmap(get_map(eus, zoom=5, maptype = "watercolor")) +
geom_point(data=loc_dat, mapping=aes(x=lon, y=lat), color="red") +
geom_text(data=loc_dat, mapping=aes(x=lon, y=lat, label=name), nudge_y=-0.5, nudge_x=1, size=2, color="red")
ggplot2ggplot2 provides basic maps that allow for advanced visualizations.
The map_data() function from ggplot2 gets the set of points that define a region’s boundaries.
If you don’t define the group aesthetic, you’re drawing one single polygon that connects all the data points rather than a set of 48 different polygons from state map data.
# Load the state map data
states <- map_data("state")
# Inspect the state data
knitr::kable(head(states)) # The data points define each state as an individual polygon.
| long | lat | group | order | region | subregion |
|---|---|---|---|---|---|
| -87.46201 | 30.38968 | 1 | 1 | alabama | NA |
| -87.48493 | 30.37249 | 1 | 2 | alabama | NA |
| -87.52503 | 30.37249 | 1 | 3 | alabama | NA |
| -87.53076 | 30.33239 | 1 | 4 | alabama | NA |
| -87.57087 | 30.32665 | 1 | 5 | alabama | NA |
| -87.58806 | 30.32665 | 1 | 6 | alabama | NA |
# Plot the state data
ggplot(data=states, mapping=aes(x=long, y=lat)) +
geom_polygon()
# Improve the plot
ggplot(data=states, mapping=aes(x=long, y=lat, group=group)) +
geom_polygon() +
coord_map() + # fix the coordinate system
theme_bw() + # change the background
labs(x='Longitude', y = 'Latitude') # add x and y labels
I use a dataset (created on December 6th, 2020) on state voting counts from NBC news, along with States Latitudes and Longitudes from Google.
I create voting percentages within the range of [0.4, 0.6] so that I could eventually color the map red and blue in a manner that closely reflected population political leanings.
US2020_Election <- readxl::read_excel("Presidential_Results_Dec6_2020.xlsx") %>%
mutate(Total = Trump+Biden) %>%
mutate(Trump_Percent =
ifelse(Trump/Total>0.6, 0.6, ifelse(Trump/Total<0.4, 0.4, Trump/Total)))
US2020_Election %>% head()
## # A tibble: 6 × 8
## Biden Trump State_Abbreviat… Lat Lon State_Name Total Trump_Percent
## <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 849648 1441168 AL 32.3 -86.9 Alabama 2.29e6 0.6
## 2 153778 189951 AK 63.6 -154. Alaska 3.44e5 0.553
## 3 1672143 1661686 AZ 34.0 -111. Arizona 3.33e6 0.498
## 4 423932 760647 AR 35.2 -91.8 Arkansas 1.18e6 0.6
## 5 11109764 6005961 CA 36.8 -119. California 1.71e7 0.4
## 6 1804352 1364607 CO 39.6 -106. Colorado 3.17e6 0.431
states %>% head()
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
US2020_Election$region <- tolower(US2020_Election$State_Name) # small letters
mapdata <- merge(states, US2020_Election, by = "region")
#View(mapdata)
ggplot(mapdata) +
geom_polygon(aes(x=long,y=lat,group=group, fill=Trump_Percent)) +
coord_map() + theme_bw()
ggplot(mapdata) +
geom_polygon(aes(x=long,y=lat,group=group, fill=Trump_Percent)) +
coord_map() + theme_bw() +
scale_fill_gradient2(low="blue", mid='purple', high="red", midpoint=0.5,
limits=c(.4,.6), name='Rep %')
tmaptmap is a powerful and flexible map-making package. It has a concise syntax that allows for the creation of attractive maps with minimal code which will be familiar to ggplot2 users.
For more info, see tmap-getstarted, Making maps with R, as well as Tennekes (2018).
library(sf)
library(tigris) # download shp file from U.S. census website
library(tmaptools)
library(tmap)
download.file("http://www2.census.gov/geo/tiger/GENZ2015/shp/cb_2015_us_state_20m.zip", destfile = "states.zip")
unzip("states.zip")
Merge data, and delete the data.
dplyr::inner_join(): includes all rows in x and y.
dplyr::left_join(): includes all rows in x.
dplyr::right_join(): includes all rows in y.
dplyr::full_join(): includes all rows in x or y.
To join by different variables on x and y, use a named vector. For example, by = c(“a” = “b”) will match x\(a to y\)b.
usmap <- st_read("cb_2015_us_state_20m.shp", stringsAsFactors = FALSE)
## Reading layer `cb_2015_us_state_20m' from data source
## `/Users/youmisuk/Desktop/DS3003/W10 - Maps/cb_2015_us_state_20m.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 52 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
## Geodetic CRS: NAD83
usdata <- inner_join(usmap, US2020_Election, by=c("STUSPS" = "State_Abbreviation"))
str(usdata)
## Classes 'sf' and 'data.frame': 51 obs. of 18 variables:
## $ STATEFP : chr "48" "06" "21" "13" ...
## $ STATENS : chr "01779801" "01779778" "01779786" "01705317" ...
## $ AFFGEOID : chr "0400000US48" "0400000US06" "0400000US21" "0400000US13" ...
## $ GEOID : chr "48" "06" "21" "13" ...
## $ STUSPS : chr "TX" "CA" "KY" "GA" ...
## $ NAME : chr "Texas" "California" "Kentucky" "Georgia" ...
## $ LSAD : chr "00" "00" "00" "00" ...
## $ ALAND : num 6.77e+11 4.03e+11 1.02e+11 1.49e+11 1.40e+11 ...
## $ AWATER : num 1.90e+10 2.05e+10 2.39e+09 4.74e+09 2.94e+10 ...
## $ Biden : num 5259126 11109764 772474 2473707 1630866 ...
## $ Trump : num 5890347 6005961 1326646 2461779 1610184 ...
## $ Lat : num 32 36.8 37.8 32.2 43.8 ...
## $ Lon : num -99.9 -119.4 -84.3 -82.9 -88.8 ...
## $ State_Name : chr "Texas" "California" "Kentucky" "Georgia" ...
## $ Total : num 11149473 17115725 2099120 4935486 3241050 ...
## $ Trump_Percent: num 0.528 0.4 0.6 0.499 0.497 ...
## $ region : chr "texas" "california" "kentucky" "georgia" ...
## $ geometry :sfc_MULTIPOLYGON of length 51; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:552, 1:2] -107 -107 -107 -106 -106 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:17] "STATEFP" "STATENS" "AFFGEOID" "GEOID" ...
class(usdata)
## [1] "sf" "data.frame"
usdata2 <- usdata %>% filter(!STUSPS %in% c("PR", "HI", "AK")) # delete Puerto Rico, Hawaii, and Alaska
sf class with ggplot2.ggplot(usdata2) + geom_sf(aes(fill=Trump_Percent)) + theme_bw() +
scale_fill_gradient(low="beige", high="red", limits=c(.4,.6), name='Rep %')
tmap.tm_shape(usdata2) + tm_polygons(col = "Trump_Percent", id="STUSPS", palette = "Reds", title = "Rep %")